# -*- coding: UTF-8 -*-
'''
@Project ：weibo_opinion_analysis 
@File    ：run_main.py
@IDE     ：PyCharm
@Author  ：卷心菜
@Date    ：2023/10/13 11:19 
'''
import pandas as pd

from util.data_clean import run_dataclean_main
from util.LDA import run_LAD_main
from util import data_clean
from util import LDA

import os


def output_filter_file():
    """
    输出文件筛选
    Returns:

    """
    files = os.listdir('../data/output')
    fl = []
    for file in files:
        tmp = file.split('_')[-1]
        if tmp == 'clean.xlsx':
            fl.append(file)
        else:
            pass
    return fl


def input_filter():
    """
    输入文件过滤
    Returns:

    """
    files = os.listdir('../data/input')
    fl = []
    for file in files:
        tmp = file.split('.')[-1]
        if tmp == 'csv':
            fl.append(file)
        else:
            pass
    return fl


def data_cleanAll():
    file_list_in = input_filter()
    for file in file_list_in:
        tex = '../data/input/' + file
        data_clean.run_dataclean_main(tex)


def Part_LAD_all():
    "分批读取"
    file_list_out = output_filter_file()
    for file in file_list_out:
        tex = '../data/output/' + file
        LDA.run_LAD_main(tex)


def Con_LAD_all():
    """
    合并去读
    Returns:

    """
    file_list_out = output_filter_file()
    df = pd.DataFrame()
    for file in file_list_out:
        tex = '../data/output/' + file
        tmp = pd.read_excel(tex)
        df = pd.concat([df, tmp])

    path = '../data/input/data_all.xlsx'
    df.to_excel(path, index=False)
    run_LAD_main(path)


if __name__ == '__main__':
    data_cleanAll()
    # LAD_all() 分批
    Con_LAD_all()
    #
    # path='../data/input/data_all.xlsx'
    # run_LAD_main(path)

